version: '3'

services:
  alpaca-lora:
    build:
      context: ./
      dockerfile: Dockerfile
      args:
        BUILDKIT_INLINE_CACHE: "0"
    image: alpaca-lora
    shm_size: '64gb'
    command: generate.py --load_8bit --base_model $BASE_MODEL --lora_weights 'tloen/alpaca-lora-7b'
    restart: unless-stopped
    volumes:
      - alpaca-lora:/root/.cache # Location downloaded weights will be stored
    ports:
      - 7860:7860
    deploy:
      resources:
        reservations:
          devices:
            - driver: nvidia
              count: all
              capabilities: [ gpu ]

volumes:
  alpaca-lora:
    name: alpaca-lora
